*Purpose: Matching 

clear

include "S:\AEC\PactoRestoration\Code\globals.do"

**Packages
net install balanceplot, from("https://tdmize.github.io/data/balanceplot")
ssc install covbal
ssc install psmatch2


*** *PROGRAM ***
**Program Inputs: 1 = state

*** Matching ***
capture program drop matching
program matching 

*Import Grid Data
use "$out_grids\MASTER_Grids_Period.dta", clear

**Drop Cells Within 500m Buffer (Spillover Main)
drop if spillover500==1 & treat!=1
rename spillover500 spillover

**Drop Cells Within 1000m Buffer (Spillover Robustness)
*drop if spillover1000==1 & treat!=1
*rename spillover1000 spillover

**Protected Areas Designated as of 2000
gen PA_relevant = PA if PA==1 & PA_status=="Designated" & PA_status_yr <= 2000
	replace PA_relevant=0 if PA_relevant!=1
	
*Select Country
keep if state=="`1'"
keep if period=="pre"

**Assess PreMatch Balance
*Graph
balanceplot NetOverNF DEF_Recover DEF_Loss COV_FOREST COV_NONFOREST elevation_avg_meters slope_avg_degrees NearCity_dist_km PA_matching, group(treat) graphop(title("Pre-Match Balance")) nosort table
	graph export "$out_grids\Matching/`1'_PreMatch_Balance.png", replace
*Table
covbal treat DEF_Recover DEF_Loss COV_FOREST COV_NONFOREST elevation_avg_meters slope_avg_degrees NearCity_dist_km PA_matching, saving("$out_grids\Matching\PreMatchBalanceTable_`1'.dta", replace)

*** *Propensity Score Matching with Exact Match on Municipality using "psmatch2" ***
*Drop observations if a municipality has zero treated units
bysort cd_mun: egen treated_in_municip = total(treat)
drop if treated_in_municip == 0

*Matching by municipality using a loop that separately matches and saves a file for each municipality 
egen mun = group(municip)
	drop if inlist(mun,9,10) & state=="EspiritoSanto" //No treated observations in these municipalities
	drop if inlist(mun,4) & state=="MinasGerais" //The treated cells in this municipality have no variation in NetOverNF
	drop if inlist(mun,2,3,4,6,8,10,11) & state=="SaoPaulo" //No treated observations in these municipalities. Or no variation in NetOverNF
levelsof mun, local(munic)
display `munic'
foreach j in `munic' {
		
	*Matching
	display `j'	//Print the municipality it's currently running 
	count if mun==`j' & treat==1 //To see how many treated cells we lose from matching (compare to after)
	
	*Use psmatch - EXCLUDE COV_4 (NonVegetated Area) to prevent "dummy variable trap" since Coverage Vars alwasy sum to 2
	psmatch2 treat COV_1 COV_2 COV_3 COV_5 PV_Suppress SV_Recover SV_Suppress elevation slope PA_matching if mun==`j', out(NetOverNF)
	codebook _treated if treat==1 & mun==`j'
	codebook _support if treat==1 & mun==`j'
	
	*Keep only observations from municipality in which matching was just done 
	preserve
	keep if mun==`j'
	
	*Drop unmatched controls 
	gen pair = _id if _treated==0
	replace pair = _n1 if _treated==1
	bysort pair: egen paircount = count(pair)
	drop if paircount <2
	count if treat==1
	
	*Save temporary file of this municipality
	tempfile `j'_match
	save ``j'_match.dta'
	restore
}

*Append the matched municipality-level datasets together
gen mun2 = mun
	replace mun2 = . if mun==1
levelsof mun2, local(munic2)
display `munic2'

use `1_match.dta', clear
foreach j in `munic2' {
	append using ``j'_match.dta' 
}

*Create unique ID by municipality and matching id
egen unique_mun_match = group(mun _id)

*Sort so that each matched pair is next to one another
sort mun pair _treated

*Save matched file of state
save "$out_grids\Matching\Matching_`1'.dta", replace 

end

*** Run Matching Program for Each State 
matching Bahia 
matching EspiritoSanto 
matching Parana 
matching MinasGerais 
matching RioDeJaneiro
matching SaoPaulo 


***________ Create MASTER MATCHED File ____________***
*Merge together the state datasets that have the matching. Remember these only have the pre-period information. 
use "$out_grids\Matching\Matching_Bahia.dta", clear 
foreach v in EspiritoSanto Parana MinasGerais RioDeJaneiro SaoPaulo{
	append using "$out_grids\Matching\Matching_`v'.dta"
}

*Create unique id across all cells 
gen cell_uniqueID = _n

*Keep the merge keyvars and the matching vars 
keep state state2 cell cell_uniqueID treated_in_municip mun _pscore _treated _support _weight _NetOverNF _id _n1 _nn _pdif pair paircount unique_mun_match

*Save
save "$out_grids\Matched_All.dta", replace 

*Use MASTER File, and merge the Matching data
use "$out_grids\MASTER_Grids_Period.dta", clear
merge m:1 state state2 cell using "$out_grids\Matched_All.dta"

*Drop treated and untreated units that have no match. 
	drop if _m==1
	drop _m
	
*Sort and Order
sort state pair cell _treat
order cell_uniqueID, after(cell) 

drop treated_in_municip
assert spillover!=1 if treat==0
drop spillover*

*Proteted Areas Fill-In
replace PA = 0 if PA==. 

*Label Variables
capture program drop labels1
program labels1
label var cell_uniqueID							"Cell Unique ID (No Repeats)"
label var mun									"Municipalities Used for Matching"
label var _pscore								"Propensity Score"
label var _treated								"=0 for control; =1 for treated"
label var _support								"Common Support Indicator"
label var _weight								"Frequency of Use as a Match"
label var _NetOverNF							"Outcome Defined for Matching"
label var _id									"Unique ID from Matching"
label var _n1									"Unique ID of the Nearest Neighbor Match"
label var _nn									"Number of Matched Controls"
label var pair									"ID to Identify Matched Pairs"
label var paircount								"Identifies Pairs Using Same Untreated Match"
label var unique_mun_match						"Unique Municipality-Level Matching ID "
end
labels1 


**Create "treat_post" 
capture program drop TP
program TP
gen treat_post = treat*post 
	order treat_post, after(treat)
	label var treat_post "Treat x Post"
end
TP

*Scale Adjustments
capture program drop scale
program scale
gen elevation_km = elevation_avg_meters/1000
gen slope_100deg = slope_avg_degrees/100
gen NearCity_dist_100km = NearCity_dist_km/100
end 
scale 

**Define Interactions
capture program drop interact
program interact
gen elevation_post = elevation_km*post
	label var elevation_post "Elevation x Post"
gen elevation_treat = elevation_km*treat
	label var elevation_treat "Elevation x Treat"
gen elevation_treatpost = elevation_km*treat_post
	label var elevation_treatpost "Elevation x Treat x Post"
gen slope_post = slope_100deg*post 
	label var slope_post "Slope x Post"
gen slope_treat = slope_100deg*treat 
	label var slope_treat "Slope x Treat"
gen slope_treatpost = slope_100deg*treat_post
	label var slope_treatpost "Slope x Treat x Post"
gen Nearcitydist_post = NearCity_dist_100km*post 
	label var Nearcitydist_post "Dist to City x Post"
gen Nearcitydist_treat = NearCity_dist_100km*treat
	label var Nearcitydist_treat "Dist to City x Treat"
gen NearCitydist_treatpost = NearCity_dist_100km*treat_post
	label var NearCitydist_treatpost "Dist to City x Treat x Post"
global interactions elevation_post elevation_treat elevation_treatpost slope_post slope_treat slope_treatpost Nearcitydist_post Nearcitydist_treat NearCitydist_treatpost
global tp_interactions slope_treatpost Nearcitydist_post NearCitydist_treatpost slope_post elevation_post Nearcitydist_post
end 
interact 

*Adjust Labels
capture program drop labels2
program labels2 
label var post "Post"
label var treat "Treat"
label var elevation_km "Elevation (km)"
label var slope_100deg "Slope (deg '00s)"
label var NearCity_dist_100km "Dist to City (km '00s)"
label var NearCity_dist_km "Dist to City (km)"
end 
labels2

*Protected Areas
capture program drop PAs
program PAs
gen PA_relevant = PA if PA==1 & PA_status=="Designated" & PA_status_yr <= 2000
	replace PA_relevant=0 if PA_relevant!=1
	label var PA_relevant "Protected Areas Designated by 2000"
gen PA_treat = PA_relevant*treat
	label var PA_treat "Protected x Treat"
gen PA_post = PA_relevant*post
	label var PA_post "Protected x Post"
gen PA_treatpost = PA_relevant*treat_post
	label var PA_treatpost "Protected x Treat x Post"
	
gen PA_designated = PA if PA==1 & PA_status=="Designated"
	replace PA_designated=0 if PA_designated!=1
	label var PA_designated "Protected Areas Designated at any time"
gen PA_designated_treat = PA_designated*treat
	label var PA_designated_treat "Designated PA x Treat"
gen PA_designated_post = PA_designated*post
	label var PA_designated_post "Designated PA x Post"
gen PA_designated_treatpost = PA_designated*treat_post
	label var PA_designated_treatpost "Designated PA x Treat x Post"
end
PAs

*Order
order 	elevation_km elevation_post elevation_treat elevation_treatpost elevation_avg_meters ///
		slope_100deg slope_post slope_treat slope_treatpost slope_avg_degrees ///
		NearCity_dist_100km Nearcitydist_post Nearcitydist_treat NearCitydist_treatpost NearCity_dist_km NearCity NearCity_population NearCity_code ///
		LandSurface_code LandSurface Bioclimate_code Bioclimate Geology_code Geology EcosystemGroup EcosystemGroup_code ///
		PA_relevant PA_treat PA_post PA_treatpost PA_designated PA_designated_treat PA_designated_post PA_designated_treatpost ///
		PA PA_wdpaid PA_name PA_orig_name PA_desig PA_desig_eng PA_rep_area PA_status PA_status_yr PA_gov_type PA_own_type PA_mang_auth ///
		, after(COV_6NonObserved)
		
*Censor the NetOverNF Outcome so that values greater than 1 (from remote sensing tabulation) are set to 1
replace NetOverNF = 1 if NetOverNF > 1 & NetOverNF != .

*Save MASTER MATCHED File 
save "$out_grids\MASTER_MATCHED.dta", replace 		  // <-----------------------------------------------MASTER_MATCHED



***________ Create MASTER MATCHED YEAR Panel ____________***
*Use MASTER File, and merge the Matching data
use "$out_grids\Matched_ALL.dta", clear
merge 1:m cell state state2 using "$out_grids\MASTER_Grids_Year.dta"

*Drop treated and untreated units that have no match. 
	* There are 7,179 treated cells that have no match. They were excluded from Probit for various reasons
	* There are several million untreated cells that have no match 
	keep if _m==3
	drop _m
	
*Sort and Order
sort state pair cell _treat year
order cell_uniqueID, after(cell) 
order treat treat_area treat_percent, after(post)
order treated_in_municip mun _pscore _treated _support _weight _NetOverNF _id _n1 _nn _pdif pair paircount unique_mun_match, after(cd_mun)

*Label Variables
label var year "Year" 
labels1

*Proteted Areas 
replace PA = 0 if PA==. 

*Drop Outcomes that require Base Year (Since this is a by-year file)
drop NetOverNF RecOverNF

*Final Adjustments (programs from above)
scale
interact
labels2
PAs

order 	elevation_km elevation_post elevation_treat elevation_treatpost elevation_avg_meters ///
		slope_100deg slope_post slope_treat slope_treatpost slope_avg_degrees ///
		NearCity_dist_100km Nearcitydist_post Nearcitydist_treat NearCitydist_treatpost NearCity_dist_km NearCity NearCity_population NearCity_code ///
		LandSurface_code LandSurface Bioclimate_code Bioclimate Geology_code Geology EcosystemGroup EcosystemGroup_code ///
		PA_relevant PA_treat PA_post PA_treatpost ///
		PA PA_wdpaid PA_name PA_orig_name PA_desig PA_desig_eng PA_rep_area PA_status PA_status_yr PA_gov_type PA_own_type PA_mang_auth ///
		, after(COV_6NonObserved)

*Save MASTER MATCHED Year Panel 
save "$out_grids\MASTER_MATCHED_Year.dta", replace    // <-----------------------------------------------MASTER_MATCHED_Year




